What if I have many variables to compare?
I was looking at a dateset at work, and was wondering how I can carry out t-test to check if there were any significant difference in flavor compounds between two different species of the same fruit.
The exercise below is from a website https://www.datanovia.com/en/blog/how-to-perform-multiple-t-test-in-r-for-different-variables/, and I used it on a dataset which was on the physicochemical properties of bananas (from Easy Statistics for Food Science with R)
data <- iris %>%
filter(Species != "virginica") %>%
as_tibble()
glimpse(data)
Rows: 100
Columns: 5
$ Sepal.Length <dbl> 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.…
$ Sepal.Width <dbl> 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.…
$ Petal.Length <dbl> 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.…
$ Petal.Width <dbl> 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.…
$ Species <fct> setosa, setosa, setosa, setosa, setosa, setosa,…
data_long <- data %>%
pivot_longer(cols = Sepal.Length:Petal.Width,
names_to = "variables",
values_to = "value")
glimpse(data_long)
Rows: 400
Columns: 3
$ Species <fct> setosa, setosa, setosa, setosa, setosa, setosa, se…
$ variables <chr> "Sepal.Length", "Sepal.Width", "Petal.Length", "Pe…
$ value <dbl> 5.1, 3.5, 1.4, 0.2, 4.9, 3.0, 1.4, 0.2, 4.7, 3.2, …
stat_test <- data_long %>%
group_by(variables) %>%
t_test(value ~ Species) %>%
adjust_pvalue(method = "bonferroni") %>%
add_significance()
boxplot <- ggboxplot(data_long,
x = "Species",
y = "value",
fill = "Species",
palette = "jco",
legend = "none",
ggtheme = theme_pubr(border = T)) +
facet_wrap ( ~ variables, scales = "free" )
# with sig diff
stat_test_plot <- stat_test %>%
add_xy_position(x = "Species") # from r-statix
boxplot +
stat_pvalue_manual(stat_test_plot, label = "p.adj.signif",
hide.ns = T, tip.length = 0)
Comparing the pH and TSS (Brix) of bananas (Cavendish vs Dream) (dataset is from Easy Statistics for Food Science with R, page 78)
banana <- tribble(
~ variety, ~pH, ~tss,
# --------#------#-------
"cavendish", 5.60, 4.33,
"cavendish", 5.57, 4.03,
"cavendish", 4.76, 3.77,
"cavendish", 5.56, 4.10,
"cavendish", 4.95, 3.97,
"cavendish", 4.84, 4.40,
"cavendish", 5.07, 4.50,
"cavendish", 4.94, 4.43,
"cavendish", 5.04, 4.30,
"cavendish", 4.93, 4.57,
"cavendish", 5.05, 4.30,
"cavendish", 5.21, 4.40,
"dream", 4.31, 3.67,
"dream", 4.41, 3.80,
"dream", 4.35, 3.00,
"dream", 4.49, 3.40,
"dream", 4.39, 3.67,
"dream", 4.43, 3.33,
"dream", 4.44, 3.47,
"dream", 4.44, 3.57,
"dream", 4.52, 3.20,
"dream", 4.79, 3.17,
"dream", 4.68, 3.27,
"dream", 4.83, 3.30
) %>%
mutate(variety = factor(variety))
glimpse(banana)
Rows: 24
Columns: 3
$ variety <fct> cavendish, cavendish, cavendish, cavendish, cavendis…
$ pH <dbl> 5.60, 5.57, 4.76, 5.56, 4.95, 4.84, 5.07, 4.94, 5.04…
$ tss <dbl> 4.33, 4.03, 3.77, 4.10, 3.97, 4.40, 4.50, 4.43, 4.30…
banana_long <- banana %>%
pivot_longer(cols = pH:tss,
names_to = "variables",
values_to = "values")
glimpse(banana_long)
Rows: 48
Columns: 3
$ variety <fct> cavendish, cavendish, cavendish, cavendish, cavend…
$ variables <chr> "pH", "tss", "pH", "tss", "pH", "tss", "pH", "tss"…
$ values <dbl> 5.60, 4.33, 5.57, 4.03, 4.76, 3.77, 5.56, 4.10, 4.…
stat_test <- banana_long %>%
group_by(variables) %>%
t_test(values ~ variety) %>%
adjust_pvalue(method = "none") %>%
add_significance()
stat_test
# A tibble: 2 × 11
variables .y. group1 group2 n1 n2 statistic df p
* <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
1 pH values cavend… dream 12 12 6.32 17.6 6.56e-6
2 tss values cavend… dream 12 12 8.75 22.0 1.28e-8
# … with 2 more variables: p.adj <dbl>, p.adj.signif <chr>
boxplot <- ggboxplot(banana_long,
x = "variety",
y = "values",
fill = "variety",
palette = "jco",
legend = "none",
ggtheme = theme_pubr(border = T)) +
facet_wrap ( ~ variables, scales = "free" ) +
labs(title = "Cavendish Bananas have higher pH and total soluble solids. ")
# with sig diff
stat_test_plot <- stat_test %>%
add_xy_position(x = "variety") # from r-statix
boxplot +
stat_pvalue_manual(stat_test_plot,
label = "p = {p.adj}{p.adj.signif}",
hide.ns = T, tip.length = 0)
https://www.datanovia.com/en/blog/how-to-perform-multiple-t-test-in-r-for-different-variables/
https://www.datanovia.com/en/blog/how-to-add-p-values-to-ggplot-facets/
For attribution, please cite this work as
lruolin (2021, July 26). pRactice corner: Multiple t-test in R. Retrieved from https://lruolin.github.io/myBlog/posts/20210726 Multiple t-test in R for different variables/
BibTeX citation
@misc{lruolin2021multiple, author = {lruolin, }, title = {pRactice corner: Multiple t-test in R}, url = {https://lruolin.github.io/myBlog/posts/20210726 Multiple t-test in R for different variables/}, year = {2021} }